1 Macula Densa Project

1.1 Objectives

Three Main Goals of this File
Produce Cleaner looking code.
Identify the amount of clusters there are
Identify the top genes expressed in each of the clusters

1.2 Problems I need to Fix

Save things as RDS file so I dont have to rerun the whole code

2 Loading in Data sets + Library packages.

## 
## The downloaded binary packages are in
##  /var/folders/7n/x74qctp91rng390gx0z9hmd80000gn/T//Rtmp92WlpK/downloaded_packages
options(future.globals.maxSize = 74 * 1024^3) # 55 GB
getOption("future.globals.maxSize") #59055800320
## [1] 79456894976
load(here("jk_code", "JK_cleanMD.rds"))

3 Analyzing DATASET

DimPlot(SO4,split.by ="sample",group.by = "sample")

DimPlot(SO4)

ElbowPlot(SO4)

SO4 <- SCTransform(SO4) %>%
    RunPCA() %>%
    FindNeighbors(dims = 1:30) %>%
    FindClusters(resolution = 0.2) %>%
    RunUMAP(dims = 1:30)
## Running SCTransform on assay: RNA
## Warning: The `slot` argument of `GetAssayData()` is deprecated as of SeuratObject 5.0.0.
## ℹ Please use the `layer` argument instead.
## ℹ The deprecated feature was likely used in the Seurat package.
##   Please report the issue at <https://github.com/satijalab/seurat/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## vst.flavor='v2' set. Using model with fixed slope and excluding poisson genes.
## Calculating cell attributes from input UMI matrix: log_umi
## Variance stabilizing transformation of count matrix of size 22741 by 11431
## Model formula is y ~ log_umi
## Get Negative Binomial regression parameters per gene
## Using 2000 genes, 5000 cells
## There are 1 estimated thetas smaller than 1e-07 - will be set to 1e-07
## Found 319 outliers - those will be ignored in fitting/regularization step
## Second step: Get residuals using fitted parameters for 22741 genes
## Computing corrected count matrix for 22741 genes
## Calculating gene attributes
## Wall clock passed: Time difference of 23.90478 secs
## Determine variable features
## Centering data matrix
## Place corrected count matrix in counts slot
## Warning: The `slot` argument of `SetAssayData()` is deprecated as of SeuratObject 5.0.0.
## ℹ Please use the `layer` argument instead.
## ℹ The deprecated feature was likely used in the Seurat package.
##   Please report the issue at <https://github.com/satijalab/seurat/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Set default assay to SCT
## PC_ 1 
## Positive:  Pappa2, Malat1, Gm37376, Zfand5, Mir6236, Aard, Robo2, Wwc2, Neat1, Pde10a 
##     Itga4, Nadk2, Sdc4, Nos1, Col4a4, S100g, Col4a3, Sgms2, Irx1, Ramp3 
##     Ptgs2, Etnk1, Tmem158, Itprid2, Srrm2, Hnrnpa2b1, Bmp3, Ranbp3l, Camk2d, Zbtb20 
## Negative:  Umod, Egf, Tmem52b, Sult1d1, Klk1, Sostdc1, Fabp3, Wfdc2, Cox6c, Prdx5 
##     Foxq1, Ly6a, Wfdc15b, Atp1a1, Krt7, Slc25a5, Cox5b, Cox4i1, Ckb, Atp5g1 
##     Cox8a, Cldn19, Ndufa4, Ggt1, Atp1b1, Atp5b, Gm47708, Chchd10, mt-Nd4l, Gm44120 
## PC_ 2 
## Positive:  Malat1, Mir6236, Egf, Gm37376, CT010467.1, Umod, mt-Nd5, Nme7, Tmem52b, Neat1 
##     mt-Nd4l, mt-Rnr1, Gm24447, Slc12a1, mt-Nd6, Kcnq1ot1, mt-Nd2, mt-Co1, Atp1b1, Gm28438 
##     Lars2, Etnk1, Wnk1, Dst, mt-Nd4, mt-Rnr2, Foxq1, Sult1d1, Slc5a3, Atrx 
## Negative:  Gm22133, Fth1, Ubb, Mt1, Ldhb, Ftl1, Eif1, Cd63, Car15, H3f3b 
##     Prdx1, Mpc2, Fxyd2, Cd9, Rps24, Mgst1, Tmem213, Hspa1a, Rpl7, Mt2 
##     Itm2b, Rps5, Bsg, Clu, Rps27a, Rps7, Rpl11, Aldoa, Rpl9, Mdh1 
## PC_ 3 
## Positive:  Pappa2, Car15, Itm2b, Mgst1, Cd9, Aard, Gm22133, Ldhb, Sfrp1, Mpc2 
##     Cd63, Tmem59, Fth1, Tmem158, Bsg, Mcub, Ramp3, Tmem213, Epcam, Prdx1 
##     Tmbim6, Ly6a, Gm13340, Clu, Hsp90b1, mt-Nd1, Rpl9, Tmem176a, Fxyd2, Ndfip1 
## Negative:  Fos, Junb, Jun, Hspa1b, Egr1, Btg2, Hspa1a, Zfp36, Atf3, Ier2 
##     Fosb, Socs3, Klf6, Klf2, Jund, Dnajb1, Dusp1, Gadd45g, Rhob, Tsc22d1 
##     Gadd45b, Ubc, Cebpd, H3f3b, H1f2, Klf4, H2bc4, Malat1, Actb, Gm42793 
## PC_ 4 
## Positive:  Pappa2, Umod, Aard, Egf, Tmem52b, Sult1d1, Tmem158, Car15, Mcub, Ptgs2 
##     Wwc2, Hsp90b1, Foxq1, Dctd, Fth1, Wnk1, Gm22133, Iyd, Cd9, Ptprz1 
##     Sfrp1, Rpl15-ps2, Cdkn1c, Klk1, Sec14l1, Calr, Clu, Ramp3, Robo2, Gm44120 
## Negative:  Mt1, Gm8420, mt-Nd4l, mt-Co1, Gm8885, Rps21, Gm28438, mt-Rnr1, mt-Rnr2, Gm28437 
##     Apoe, Mt2, Rpl37, mt-Nd5, Rpl38, Gm28037, Rpl37a, Gm11808, Gm3511, Gm28661 
##     Aebp1, Gm29216, Gm24447, Gm11353, Gm10925, mt-Cytb, Atp5md, mt-Nd2, Rpl39, Gm6136 
## PC_ 5 
## Positive:  Klk1, Hspa1a, Gm22133, Hspa1b, CT010467.1, mt-Rnr2, Gm13340, Rpl15-ps2, Car15, Atp1a1 
##     Rpl13, Itm2b, Gm10925, mt-Nd4, Fth1, Cd63, Gm28437, Ptger3, Ftl1, Rpl10 
##     Hspa8, Apoe, Clcnkb, Gpx6, Gm28661, Cldn10, mt-Cytb, Rpl9, mt-Rnr1, mt-Nd5 
## Negative:  S100g, Gm8420, Gm8885, Rps21, Pappa2, Rpl41, Rpl37, Rpl38, Actb, Rpl37a 
##     Tmem52b, Gm11808, Rps12, Tpt1, Gm6136, Rpl39, Gm3511, Ppia, Gm11353, Rps27 
##     Tmsb10, Aard, Gm9616, Gm7206, Rpl23, Gm9843, Rpl5-ps1, Rps26, Rpl23a, Gm12338
## Computing nearest neighbor graph
## Computing SNN
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 11431
## Number of edges: 378883
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8868
## Number of communities: 5
## Elapsed time: 1 seconds
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 13:35:04 UMAP embedding parameters a = 0.9922 b = 1.112
## 13:35:04 Read 11431 rows and found 30 numeric columns
## 13:35:04 Using Annoy for neighbor search, n_neighbors = 30
## 13:35:04 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 13:35:05 Writing NN index file to temp file /var/folders/7n/x74qctp91rng390gx0z9hmd80000gn/T//Rtmp92WlpK/file4de570729ee5
## 13:35:05 Searching Annoy index using 1 thread, search_k = 3000
## 13:35:06 Annoy recall = 100%
## 13:35:07 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 13:35:07 Initializing from normalized Laplacian + noise (using RSpectra)
## 13:35:07 Commencing optimization for 200 epochs, with 500908 positive edges
## 13:35:07 Using rng type: pcg
## 13:35:10 Optimization finished
DimPlot(SO4)

3.1 Viewing for different DEGs

markers.to.plot_MD <- c(
  "Egf",
  "S100g",
  "Fos",
  "Foxq1",
  "Pappa2",
  "Mcub",
  "Hmox1")


DotPlot(SO4,
features = markers.to.plot_MD,
dot.scale = 8,
dot.min = 0,
scale = FALSE,
scale.max = 100,
scale.min = 0,
col.min = -2.5,
col.max = 2.5)+
coord_flip()

markers.to.plot1 <- c(
  "S100g",    # 
  "Atf3",     # 
  "Egr1",     # 
  "Fos",      # 
  "Jun",      #
  "Junb",     #
  "Pappa2",   #
  "Cxcl10",   # 
  "Cldn19",   # 
  "Krt7",     #
  "Egf",      #
  "Aard",
  "Ptger3",
  "Leng9",
  "Ckb",
  "Mcub",
  "Fabp3",
  "Ccn1",
  "Foxq1",
  "Cxcl12",
  "Vash2",
  "Pamr1",
  "Vegfa"
)

                      
                      
DotPlot(SO4,
features = markers.to.plot1,
dot.scale = 8,
dot.min = 0,
scale = FALSE,
scale.max = 100,
scale.min = 0,
col.min = -2.5,
col.max = 2.5)+
coord_flip()

DimPlot(SO4)

## Finding Markers for each Cluster

SO.markers <- FindAllMarkers(SO4, only.pos = TRUE)
## Calculating cluster 0
## Warning: `PackageCheck()` was deprecated in SeuratObject 5.0.0.
## ℹ Please use `rlang::check_installed()` instead.
## ℹ The deprecated feature was likely used in the Seurat package.
##   Please report the issue at <https://github.com/satijalab/seurat/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## For a (much!) faster implementation of the Wilcoxon Rank Sum Test,
## (default method for FindMarkers) please install the presto package
## --------------------------------------------
## install.packages('devtools')
## devtools::install_github('immunogenomics/presto')
## --------------------------------------------
## After installation of presto, Seurat will automatically use the more 
## efficient implementation (no further action necessary).
## This message will be shown once per session
## Calculating cluster 1
## Calculating cluster 2
## Calculating cluster 3
## Calculating cluster 4
SO.markers %>%
    group_by(cluster) %>%
    dplyr::filter(avg_log2FC > 1)
SO.markers %>%
    group_by(cluster) %>%
    dplyr::filter(avg_log2FC > 1) %>%
    slice_head(n = 10) %>%
    ungroup() -> top10

DoHeatmap(SO4, features = top10$gene) + NoLegend()
## Warning in DoHeatmap(SO4, features = top10$gene): The following features were
## omitted as they were not found in the scale.data slot for the SCT assay: Phkg1,
## Rtp4, Gm26982, Oasl2, Rsad2, Oasl1, Il3ra, Gm3500, Abcb1a

DimPlot(SO4)

DimPlot(SO4,split.by = "sample")

## Renaming my clusters

SO4@meta.data <- SO4@meta.data %>% 
  mutate(subclass_MD = dplyr::case_when(
    seurat_clusters == 0  ~ "type_1",
    seurat_clusters == 1  ~ "type_1",
    seurat_clusters == 2  ~ "type_2",
    seurat_clusters == 3  ~ "type_3",
    seurat_clusters == 4  ~ "type_4"
  ))

SO4@meta.data$subclass_MD <- factor(SO4@meta.data$subclass_MD , levels = c("type_1", "type_2", "type_3","type_4"))

Idents(SO4) <- SO4@meta.data$subclass_MD

DimPlot(object = SO4, reduction = "umap", group.by = "subclass_MD", label = TRUE)

DimPlot(object = SO4, reduction = "umap", label = TRUE)

Idents(SO4) <- "subclass_MD"

DimPlot(SO4,group.by = "seurat_clusters")

DimPlot(SO4,group.by = "subclass_MD")

markers.to.plot3<- c(
  "Pappa2",
  "Egf",
  "Jun",
  "Cxcl10"
)

                      
                      
DotPlot(SO4,
features = markers.to.plot3,
dot.scale = 8,
dot.min = 0,
scale = FALSE,
scale.max = 100,
scale.min = 0,
col.min = -2.5,
col.max = 2.5)+
coord_flip()

3.2 Finding markers between each type now

type_markers <- FindAllMarkers(SO4, only.pos = TRUE)
## Calculating cluster type_1
## Calculating cluster type_2
## Calculating cluster type_3
## Calculating cluster type_4
type_markers %>%
    group_by(cluster) %>%
    dplyr::filter(avg_log2FC > 1)
type_markers %>%
    group_by(cluster) %>%
    dplyr::filter(avg_log2FC > 1) %>%
    slice_head(n = 10) %>%
    ungroup() -> top10

DoHeatmap(SO4, features = top10$gene) + NoLegend()
## Warning in DoHeatmap(SO4, features = top10$gene): The following features were
## omitted as they were not found in the scale.data slot for the SCT assay: Phkg1,
## Rtp4, Gm26982, Oasl2, Rsad2, Oasl1

DimPlot(SO4)

DimPlot(SO4,split.by = "sample")

3.3 Top genes

markers.to.plot3<- c(
  "Pappa2",
  "Egf",
  "Fos",
  "Cxcl10"
)

                      
                      
DotPlot(SO4,
features = markers.to.plot3,
dot.scale = 8,
dot.min = 0,
scale = FALSE,
scale.max = 100,
scale.min = 0,
col.min = -2.5,
col.max = 2.5)+
coord_flip()

4 Save Object

save(SO4, file = here("jk_code", "SO4analysis.rds"))